2.4 輸入指令
# read the rating data
sprating = read.csv('D:/ratingData.csv')
sprating$gvkey = as.character(sprating$gvkey)# change the data type into character from integer
sprating$datadate = as.Date(sprating$datadate)# change the data type into the date from character
# take all of the gvkey
gvkey = unique(sprating$gvkey)
# build a function to transform the data
FillMisMonthData = function(df){
gvkeyid = df$gvkey[1]
library(dplyr)
# construct a matrix with all of the ends of the month between "1990-01-01" and "2022-07-31"
ts <- seq(as.Date("1990-02-01"),length=391,by="months")-1
df_time <- data.frame(datadate=ts)
data_eotm <- suppressMessages(full_join(df_time,df))
# fill the same data like gvkey and country
data_eotm$gvkey = gvkeyid
data_eotm$country = 'USA'
library(tidyr)
# fill the rating data with the last seen
data_eotm = data_eotm %>% fill(splticrm, .direction = 'down')
# switch the order in a data frame
data_eotm_fill = select(data_eotm, gvkey, splticrm, datadate, country)
return(data_eotm_fill)
}
# use the function on all gvkey, and bind together
sprating_fillmismonth = c()
library(progress)
pb <- progress_bar$new(total = length(gvkey))
for(id in gvkey){
sprating_gvkey = sprating[sprating$gvkey==id,]
sprating_gvkey = FillMisMonthData(sprating_gvkey)
sprating_fillmismonth = rbind(sprating_fillmismonth, sprating_gvkey)
pb$tick()
Sys.sleep(1/length(gvkey))
}
# date: yyyy-mm-dd to yyyymm, year and month only
sprating_fillmismonth$datadate = format(as.Date(sprating_fillmismonth$datadate, "%Y-%m-%d"), "%Y%m")
# save the file to txt or csv
write.table(sprating_fillmismonth, 'D:/sprating_EndOfTheMonth.txt', na = "", row.names=FALSE, quote = F, sep = "\t")
